Organizational Behavior: Geographic Distribution of Agricultural Units in Peru

Author

Elías Aburto Camacllanqui

Published

January 14, 2024

Introduction

Agricultural Unit

Objective: To visualize the geographic distribution of the agricultural units that participated in the 2019 National Agricultural Survey (ENA 2019) according to production size.

Methodology

Data: INEI

Survey: ENA 2019

Data preprocessing

#install.packages("mapsPERU")

library(plotly)
library(mapsPERU)
library(ggplot2)
library(tidyverse)
library(ggrepel)
library(dplyr) #para utilizar mutate
library(readr)
library(haven)

library(leaflet)
library(leaflet.extras)
library(rworldxtra)
library(raster)
library(sf)
library(tidyverse)

#Extraemos las coordenadas del paquete mapsPERU distrito
df <- map_DIST

#Cargamos nuestra base de datos
cap1200 <- haven::read_sav("data/20_Cap1200.sav")

#Visualizamos la base de datos
df
# A tibble: 1,891 × 13
   COD_REGION COD_DEPARTAMENTO COD_PROVINCIA COD_DISTRITO REGION   DEPARTAMENTO
   <chr>      <chr>            <chr>         <chr>        <chr>    <chr>       
 1 010000     010000           010100        010101       Amazonas Amazonas    
 2 010000     010000           010100        010102       Amazonas Amazonas    
 3 010000     010000           010100        010103       Amazonas Amazonas    
 4 010000     010000           010100        010104       Amazonas Amazonas    
 5 010000     010000           010100        010105       Amazonas Amazonas    
 6 010000     010000           010100        010106       Amazonas Amazonas    
 7 010000     010000           010100        010107       Amazonas Amazonas    
 8 010000     010000           010100        010108       Amazonas Amazonas    
 9 010000     010000           010100        010109       Amazonas Amazonas    
10 010000     010000           010100        010110       Amazonas Amazonas    
# ℹ 1,881 more rows
# ℹ 7 more variables: PROVINCIA <chr>, DISTRITO <chr>,
#   NOMBRE_CAPITAL_LEGAL <chr>, REGION_NATURAL <chr>, coords_x <dbl>,
#   coords_y <dbl>, geometry <MULTIPOLYGON [°]>
cap1200
# A tibble: 29,555 × 30
    ANIO CCDD  NOMBREDD CCPP  NOMBREPV  CCDI  NOMBREDI CONGLOMERADO NSELUA UA   
   <dbl> <chr> <chr>    <chr> <chr>     <chr> <chr>    <chr>        <chr>  <chr>
 1  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00006  1    
 2  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00022  2    
 3  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00043  3    
 4  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00062  4    
 5  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00077  5    
 6  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00101  6    
 7  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00118  7    
 8  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00120  8    
 9  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00168  10   
10  2019 24    TUMBES   02    CONTRALM… 03    CANOAS … 00001        00186  11   
# ℹ 29,545 more rows
# ℹ 20 more variables: ESTRATO <dbl+lbl>, RESFIN <dbl+lbl>, REGION <dbl+lbl>,
#   DOMINIO <dbl+lbl>, FACTOR <dbl>, CODIGO <dbl+lbl>, P101A <dbl+lbl>,
#   P102_1 <dbl+lbl>, P102_2 <dbl+lbl>, P102_3 <dbl+lbl>, P1201 <dbl>,
#   P1201_EE <dbl>, P1201_N <dbl+lbl>, P1202_ENT <dbl>, P1202_DEC <chr>,
#   P1202_UM <dbl+lbl>, P1202_COD <dbl>, P1202_EQUIV_1 <dbl>,
#   P1202_EQUIV_2 <chr>, OMICAP1200 <chr>

We notice that the columns DEPARTMENTO PROVINCIA DISTRITO are in lower case and with tilde while the database cap1200 is in upper case and without tilde. cap1200 database are in uppercase and without tilde. We have to homogenize them.

We convert all variables that have lowercase characters to uppercase.

df <- mutate_if(df, is.character, toupper)
df
# A tibble: 1,891 × 13
   COD_REGION COD_DEPARTAMENTO COD_PROVINCIA COD_DISTRITO REGION   DEPARTAMENTO
   <chr>      <chr>            <chr>         <chr>        <chr>    <chr>       
 1 010000     010000           010100        010101       AMAZONAS AMAZONAS    
 2 010000     010000           010100        010102       AMAZONAS AMAZONAS    
 3 010000     010000           010100        010103       AMAZONAS AMAZONAS    
 4 010000     010000           010100        010104       AMAZONAS AMAZONAS    
 5 010000     010000           010100        010105       AMAZONAS AMAZONAS    
 6 010000     010000           010100        010106       AMAZONAS AMAZONAS    
 7 010000     010000           010100        010107       AMAZONAS AMAZONAS    
 8 010000     010000           010100        010108       AMAZONAS AMAZONAS    
 9 010000     010000           010100        010109       AMAZONAS AMAZONAS    
10 010000     010000           010100        010110       AMAZONAS AMAZONAS    
# ℹ 1,881 more rows
# ℹ 7 more variables: PROVINCIA <chr>, DISTRITO <chr>,
#   NOMBRE_CAPITAL_LEGAL <chr>, REGION_NATURAL <chr>, coords_x <dbl>,
#   coords_y <dbl>, geometry <MULTIPOLYGON [°]>

We remove accents from capital letters.

df$DEPARTAMENTO <- chartr('Á,É,Í,Ó,Ú','A,E,I,O,U', df$DEPARTAMENTO)
df$PROVINCIA <- chartr('Á,É,Í,Ó,Ú','A,E,I,O,U', df$PROVINCIA)
df$DISTRITO <- chartr('Á,É,Í,Ó,Ú','A,E,I,O,U', df$DISTRITO)
# We display the name of the variables
names(cap1200)
 [1] "ANIO"          "CCDD"          "NOMBREDD"      "CCPP"         
 [5] "NOMBREPV"      "CCDI"          "NOMBREDI"      "CONGLOMERADO" 
 [9] "NSELUA"        "UA"            "ESTRATO"       "RESFIN"       
[13] "REGION"        "DOMINIO"       "FACTOR"        "CODIGO"       
[17] "P101A"         "P102_1"        "P102_2"        "P102_3"       
[21] "P1201"         "P1201_EE"      "P1201_N"       "P1202_ENT"    
[25] "P1202_DEC"     "P1202_UM"      "P1202_COD"     "P1202_EQUIV_1"
[29] "P1202_EQUIV_2" "OMICAP1200"   
# Rename the variable NOMBREDI of the base cap1200
cap1200 <- rename(cap1200, DISTRITO = NOMBREDI)


#Rename the categories of the size of agricultural units

Cod_tipo <- c(`1`="Pequeña y mediana UA",
              `2`="Grande UA")


cap1200$CODIGO <- as.factor(cap1200$CODIGO)

names(cap1200)
 [1] "ANIO"          "CCDD"          "NOMBREDD"      "CCPP"         
 [5] "NOMBREPV"      "CCDI"          "DISTRITO"      "CONGLOMERADO" 
 [9] "NSELUA"        "UA"            "ESTRATO"       "RESFIN"       
[13] "REGION"        "DOMINIO"       "FACTOR"        "CODIGO"       
[17] "P101A"         "P102_1"        "P102_2"        "P102_3"       
[21] "P1201"         "P1201_EE"      "P1201_N"       "P1202_ENT"    
[25] "P1202_DEC"     "P1202_UM"      "P1202_COD"     "P1202_EQUIV_1"
[29] "P1202_EQUIV_2" "OMICAP1200"   
cap1200 <- cap1200 %>% 
  mutate(CODIGO = recode_factor(CODIGO,!!!Cod_tipo))


#We integrate both databases through DISTRITO
ENA2019 <- left_join(df, cap1200, by = "DISTRITO")

names(ENA2019)
 [1] "COD_REGION"           "COD_DEPARTAMENTO"     "COD_PROVINCIA"       
 [4] "COD_DISTRITO"         "REGION.x"             "DEPARTAMENTO"        
 [7] "PROVINCIA"            "DISTRITO"             "NOMBRE_CAPITAL_LEGAL"
[10] "REGION_NATURAL"       "coords_x"             "coords_y"            
[13] "geometry"             "ANIO"                 "CCDD"                
[16] "NOMBREDD"             "CCPP"                 "NOMBREPV"            
[19] "CCDI"                 "CONGLOMERADO"         "NSELUA"              
[22] "UA"                   "ESTRATO"              "RESFIN"              
[25] "REGION.y"             "DOMINIO"              "FACTOR"              
[28] "CODIGO"               "P101A"                "P102_1"              
[31] "P102_2"               "P102_3"               "P1201"               
[34] "P1201_EE"             "P1201_N"              "P1202_ENT"           
[37] "P1202_DEC"            "P1202_UM"             "P1202_COD"           
[40] "P1202_EQUIV_1"        "P1202_EQUIV_2"        "OMICAP1200"          

Results

1. We create the map base

leaflet() %>% addTiles()

2. We add the coordinates of all Agricultural units.

leaflet() %>% addTiles() %>% 
  addCircles(data = ENA2019, lat = ~coords_y, lng = ~coords_x)

3. We have added colors to identify the size of the agricultural units.

## to generate colors
#number of types of producers
Number_tpp <- ENA2019$CODIGO %>% unique() %>% 
  length()

#Species name
tpp_Names <- ENA2019$CODIGO %>% unique()


## The colors of the sizes of the agricultural units will be:
Colores <- c('#e41a1c', '#377eb8', '#4daf4a')

table(ENA2019$CODIGO)

Pequeña y mediana UA            Grande UA 
               33857                 1847 
#Linking the color palette to name types
pal <- colorFactor(Colores, domain = tpp_Names)



##Map with colors. fillopacity is transparency
leaflet() %>% 
  addTiles() %>% 
  addCircles(data = ENA2019, lat = ~coords_y, lng = ~coords_x, color = ~pal(CODIGO), fillOpacity = 0.5)

4. We add labels to agricultural units.

#labels
p <- leaflet() %>% 
  addTiles() %>% 
  addCircles(data = ENA2019, lat = ~coords_y, 
             lng = ~coords_x, color = ~pal(CODIGO),
             fillOpacity = 0.5, label = ~CODIGO, 
             group = "Codigo")

p

5. We generate a legend.

## Generar una leyenda
p <- p %>% addLegend(data = ENA2019, "bottomright", pal = pal, 
                     values = ~CODIGO, title = "Tipos de productores", 
                     opacity = 0.8, group = "Leyenda")
p

6. We add layers according to the aspects of interest.

## Seleccionar capas
p <- p %>% addLayersControl(overlayGroups = c("Codigo", "Leyenda"), 
                            options = layersControlOptions(collapsed = F))
p

Conclusions

It can be observed that the majority of agricultural units in Peru have small and medium production sizes. In addition, they are mostly located in the central region of Peru.